
# Purpose : Visualization of data
# Output  : (1) Creates maps included in the paper 
# Output  : (2) Revealed comparative advantage graph
# Updated : March 18, 2025

#==============================
# SET UP ####
#==============================
rm(list=ls(all=TRUE))
gc()

options(stringsAsFactors = FALSE)
options(scipen = 3)
set.seed(1991)

# Packages
require(tidyverse)
require(openintro)
require(sf)

#install.packages('devtools')
#require(devtools)
#devtools::install_github("dkahle/ggmap",force=T)
require(ggmap)

library(usdata)
library(tidycensus)
library(usmap)

#==============================
# Maps ####
#==============================

## 1. Geographic data ####
# This function returns congressional district boundaries 
get_congress_map <- function(cong) {
  tmp_file <- tempfile()
  tmp_dir  <- tempdir()
  zp <- sprintf("http://cdmaps.polisci.ucla.edu/shp/districts%03i.zip",cong)
  download.file(zp, tmp_file)
  unzip(zipfile = tmp_file, exdir = tmp_dir)
  fpath <- paste(tmp_dir, sprintf("districtShapes/districts%03i.shp",cong), sep = "/")
  st_read(fpath)
}

# load districts from 109th congresss
cd109 <- get_congress_map(109)

# load districts from 110th congresss
cd110 <- get_congress_map(110)

# load districts from 111th congresss
cd111 <- get_congress_map(111)

# load districts from 112th congresss
cd112 <- get_congress_map(112)

# Load districts from 113th congress
cd113 <- get_congress_map(113)

# Load districts from 114th congress
cd114 <- get_congress_map(114)


## 2. Loading LCV Votesc####
d <- read.csv("clean/cs_dta_all.csv")[,-1]

d$state <- substr(d$state_CD, 1,2)
d$district <- substr(d$state_CD, 4,5)

#lcv scores
ds <- d %>% select(state, district, congress, pro_percentage_per_year) %>% 
  unique() %>% group_by(state, district, congress) %>% 
  summarize(mean_congress = mean(pro_percentage_per_year))
#unifying the state name format
ds$STATENAME <- abbr2state(ds$state) # this is from usdata package
ds$district <- as.character(ds$district)

#ipw (trade shock)
di <- d %>% select(state, district, congress, ipw) %>% 
  unique() %>% group_by(state, district, congress) %>% 
  summarize(mean_congress = mean(ipw))
#unifying the state name format
di$STATENAME <- abbr2state(di$state) # this is from usdata package
di$district <- as.character(di$district)


# Visualizations ####
# 1. ipw MAP ####
# Function to merge and plot ####

# Subset data for i-th Congress, merge with geocode data, and plot
plot_congress <- function(congress_num, cd, title, xlim, ylim){
  ipw <- di %>%
    filter(congress == congress_num) %>%
    select(mean_congress, state, district) %>%
    rename(DISTRICT2 = district) 
  
  # Merge in geocode data
  cd_usai <- cd %>% 
    mutate(ddd = case_when(
      DISTRICT == "0" ~ "AL",
      TRUE ~ DISTRICT
    )) %>% 
    mutate(DISTRICT2 = case_when(
      substr(ddd, 2,2) == "" ~ paste("0", substr(ddd,1,1), sep=""),
      TRUE ~ ddd)
    ) %>%
    select(DISTRICT2, STATENAME) %>%
    mutate(state = state2abbr(STATENAME)) %>%
    left_join(ipw, by = c("DISTRICT2", "state"))
  
  # Plotting
  cd_usai %>%
    ggplot() + 
    geom_sf(aes(fill = mean_congress), alpha = 0.9) + 
    scale_fill_gradient(low = "white", high = "black") +
    coord_sf(xlim = xlim, ylim = ylim)+
    theme_void() +
    labs(title = title, fill = "Average Import Shock Per Congressional District") +  
    theme(
      plot.title = element_text(size = 18, hjust = 0.5),
      legend.position = "bottom"  # Move legend to bottom
    )
}


# 2. LCV MAP ####
# Function to merge and plot ####

# Subset data for i-th Congress, merge with geocode data, and plot
plot_congress_e <- function(congress_num, cd, title, xlim, ylim){
  env <- ds %>%
    filter(congress == congress_num) %>%
    select(mean_congress, state, district) %>%
    rename(DISTRICT2 = district) 
  
  # Merge in geocode data
  cd_usa <- cd %>% 
    mutate(ddd = case_when(
      DISTRICT == "0" ~ "AL",
      TRUE ~ DISTRICT
    )) %>% 
    mutate(DISTRICT2 = case_when(
      substr(ddd, 2,2) == "" ~ paste("0", substr(ddd,1,1), sep=""),
      TRUE ~ ddd)
    ) %>%
    select(DISTRICT2, STATENAME) %>%
    mutate(state = state2abbr(STATENAME)) %>%
    left_join(env, by = c("DISTRICT2", "state"))
  
  # Plotting
  cd_usa %>%
    ggplot() + 
    geom_sf(aes(fill = mean_congress), alpha = 0.9) + 
    #scale_fill_gradient(low = "#E69F00", high = "#009E73") +
    scale_fill_gradient(low = "white", high = "black") +
    coord_sf(xlim = xlim, ylim = ylim)+
    theme_void() +
    labs(title = title, fill = "Average Pro-environmental Score") +  
    theme(
      plot.title = element_text(size = 18, hjust = 0.5),
      legend.position = "bottom"  # Move legend to bottom
    )
}

# 3. Output ####
## In the paper ####
jpeg("figures/env_score_map_109_bw.jpeg")
plot_congress_e(congress_num = 109, cd=cd109, title = "Pro-Environmental Score", xlim = c(-125,-67), ylim = c(25,50))
dev.off()

jpeg("figures/ipw_map_109_bw.jpeg")
plot_congress(congress_num = 109, cd=cd109, title = "Import Shock", xlim = c(-125,-67), ylim = c(25,50))
dev.off()

#==============================
# RCA ####
# Appendix C
#==============================
## Revealed comparative advantage/disadvantage

d_RCA <- read_csv("clean/US_RCA_20231009020405.csv")
# RCA codebook: https://unctadstat.unctad.org/datacentre/reportInfo/US.RCA

d_RCA <- d_RCA %>% 
  filter(`Economy Label` == "China" | 
           `Economy Label` == "United States of America") %>% 
  select(`Economy Label`, Year, Index, Product)

colnames(d_RCA) <- c("Country", "Year", "RCA", "Product")

d_RCAwide <- pivot_wider(d_RCA, id_cols =  c(Year, Product),
                         names_from = Country,
                         values_from = RCA) %>% 
  rename(USA=`United States of America`) %>% 
  mutate(USA_advantage = case_when(
    USA > China ~1,
    TRUE ~0
  ))

# paper mill, paper articles man. (752)
d_RCAs <- d_RCA %>% filter(Product == "752") %>% select(-Product)
ggplot(d_RCAs, aes(x=Year, y=RCA, color = Country, linetype = Country)) +
  geom_line(size = 1.2) + 
  geom_vline(xintercept=2001, linetype="dotted", 
             color = "black", size=0.5) +
  theme_bw() +
  theme(legend.position = "none")+
  annotate(geom="text", x= 2020, y=2.7, label = "China") +
  annotate(geom="text", x= 2020, y=1, label = "USA") 

ggsave("figures/RCA.jpeg", width = 400/96, height = 300/96, dpi = 600, units = "in")

# checking employment level in paper industry
emp <- read.csv("clean/emp.csv")[,-1]
# paper manufacturing is 322/// NAICS code
emp_paper <- emp %>% filter(naics == "322///")
emp_paper_y <- emp_paper %>% group_by(year) %>% tally(emp)
emp_paper_y %>% filter(year >= 1999) %>% 
  ggplot(., aes(x=year, y=n)) +
  geom_line(size = 1.2) +
  labs(x="Year", y="Emp.") +
  theme_bw()
ggsave("figures/emp_paper.jpeg", width = 400/96, height = 300/96, dpi = 600, units = "in")
